Par Hanae Chouali
import os
import sys
import time
# Get full library
sys.path.insert(0, os.path.abspath('..'))
cd
C:\Users\hamza
# Install requirements
!pip3 install -r ProjetML/requirements.txt --user
Requirement already satisfied: numpy~=1.22.0 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 1)) (1.22.0) Requirement already satisfied: sklearn~=0.0 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from -r ProjetML/requirements.txt (line 2)) (0.0) Requirement already satisfied: scikit-learn~=1.0.2 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 3)) (1.0.2) Requirement already satisfied: torch~=1.10.1 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 4)) (1.10.1) Requirement already satisfied: tqdm~=4.62.3 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 5)) (4.62.3) Requirement already satisfied: pandas~=1.3.5 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 6)) (1.3.5) Requirement already satisfied: matplotlib~=3.5.1 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from -r ProjetML/requirements.txt (line 7)) (3.5.1) Requirement already satisfied: seaborn~=0.11.2 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from -r ProjetML/requirements.txt (line 8)) (0.11.2) Requirement already satisfied: scipy>=1.1.0 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from scikit-learn~=1.0.2->-r ProjetML/requirements.txt (line 3)) (1.5.4) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from scikit-learn~=1.0.2->-r ProjetML/requirements.txt (line 3)) (2.1.0) Requirement already satisfied: joblib>=0.11 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from scikit-learn~=1.0.2->-r ProjetML/requirements.txt (line 3)) (1.0.0) Requirement already satisfied: typing-extensions in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from torch~=1.10.1->-r ProjetML/requirements.txt (line 4)) (3.7.4.3) Requirement already satisfied: colorama in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from tqdm~=4.62.3->-r ProjetML/requirements.txt (line 5)) (0.4.4) Requirement already satisfied: pytz>=2017.3 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from pandas~=1.3.5->-r ProjetML/requirements.txt (line 6)) (2021.1) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from pandas~=1.3.5->-r ProjetML/requirements.txt (line 6)) (2.8.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\hamza\appdata\roaming\python\python39\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (4.28.5) Requirement already satisfied: pillow>=6.2.0 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (8.0.1) Requirement already satisfied: packaging>=20.0 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (20.8) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (1.3.1) Requirement already satisfied: cycler>=0.10 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (0.10.0) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (2.4.7) Requirement already satisfied: six in c:\users\hamza\appdata\local\programs\python\python39\lib\site-packages (from cycler>=0.10->matplotlib~=3.5.1->-r ProjetML/requirements.txt (line 7)) (1.15.0)
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from torch import optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, KernelPCA
import matplotlib.pyplot as plt
from lib.LogisticRegressionModel import LogisticRegressionModel
from lib.NeuralNetwork import NeuralNetwork
from lib.PCA import PrincipalComponentAnalysis
from lib.RandomForestModel import RandomForestModel
from lib.SVMModel import SVMModel
# On importe les données
data = pd.read_csv('ProjetML/data/data.csv', sep=",", index_col=0)
data.head()
| gene_0 | gene_1 | gene_2 | gene_3 | gene_4 | gene_5 | gene_6 | gene_7 | gene_8 | gene_9 | ... | gene_20521 | gene_20522 | gene_20523 | gene_20524 | gene_20525 | gene_20526 | gene_20527 | gene_20528 | gene_20529 | gene_20530 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sample_0 | 0.0 | 2.017209 | 3.265527 | 5.478487 | 10.431999 | 0.0 | 7.175175 | 0.591871 | 0.0 | 0.0 | ... | 4.926711 | 8.210257 | 9.723516 | 7.220030 | 9.119813 | 12.003135 | 9.650743 | 8.921326 | 5.286759 | 0.0 |
| sample_1 | 0.0 | 0.592732 | 1.588421 | 7.586157 | 9.623011 | 0.0 | 6.816049 | 0.000000 | 0.0 | 0.0 | ... | 4.593372 | 7.323865 | 9.740931 | 6.256586 | 8.381612 | 12.674552 | 10.517059 | 9.397854 | 2.094168 | 0.0 |
| sample_2 | 0.0 | 3.511759 | 4.327199 | 6.881787 | 9.870730 | 0.0 | 6.972130 | 0.452595 | 0.0 | 0.0 | ... | 5.125213 | 8.127123 | 10.908640 | 5.401607 | 9.911597 | 9.045255 | 9.788359 | 10.090470 | 1.683023 | 0.0 |
| sample_3 | 0.0 | 3.663618 | 4.507649 | 6.659068 | 10.196184 | 0.0 | 7.843375 | 0.434882 | 0.0 | 0.0 | ... | 6.076566 | 8.792959 | 10.141520 | 8.942805 | 9.601208 | 11.392682 | 9.694814 | 9.684365 | 3.292001 | 0.0 |
| sample_4 | 0.0 | 2.655741 | 2.821547 | 6.539454 | 9.738265 | 0.0 | 6.566967 | 0.360982 | 0.0 | 0.0 | ... | 5.996032 | 8.891425 | 10.373790 | 7.181162 | 9.846910 | 11.922439 | 9.217749 | 9.461191 | 5.110372 | 0.0 |
5 rows × 20531 columns
data.info()
<class 'pandas.core.frame.DataFrame'> Index: 801 entries, sample_0 to sample_800 Columns: 20531 entries, gene_0 to gene_20530 dtypes: float64(20531) memory usage: 125.5+ MB
# On calcule des statistiques descriptives relatives aux différentes features
import plotly.graph_objs as go
import plotly.offline as py
p = data.describe().T
p = p.round(4)
table = go.Table(
columnwidth=[0.8]+[0.5]*8,
header=dict(
values=['Attribute'] + list(p.columns),
line = dict(color='#506784'),
fill = dict(color='lightblue'),
),
cells=dict(
values=[p.index] + [p[k].tolist() for k in p.columns[:]],
line = dict(color='#506784'),
fill = dict(color=['rgb(173, 216, 220)', '#f5f5fa'])
)
)
py.iplot([table], filename='table-of-genes-data')
# On importe les données correspondant aux labels
labels = pd.read_csv('ProjetML/data/labels.csv', sep=",", index_col=0)
labels.head()
| Class | |
|---|---|
| sample_0 | PRAD |
| sample_1 | LUAD |
| sample_2 | PRAD |
| sample_3 | PRAD |
| sample_4 | BRCA |
# On récupère les noms des classes
list(labels['Class'].unique())
['PRAD', 'LUAD', 'BRCA', 'KIRC', 'COAD']
# Nous transformons les noms de classes en valeurs numériques à travers un mapping présenté ci-dessous
labels_unique_values = {
'PRAD': 1,
'LUAD': 2,
'BRCA': 3,
'KIRC': 4,
'COAD': 5
}
labels['Class'] = labels['Class'].map(labels_unique_values)
labels.head()
| Class | |
|---|---|
| sample_0 | 1 |
| sample_1 | 2 |
| sample_2 | 1 |
| sample_3 | 1 |
| sample_4 | 3 |
# Distribution des classes
labels.hist(bins=50,figsize=(16,9))
plt.show()
# Création des données d'apprentissage et des données de test
X_train, X_test, y_train, y_test = train_test_split(
data,
labels,
test_size=0.2,
random_state=42
)
X_train.shape
(640, 20531)
X_test.shape
(161, 20531)
# Initialisation du modèle
model = PCA()
pca = PrincipalComponentAnalysis(model, X_train, X_test, kernel=False)
# Entrainement du modèle
pca.fit(scaler=StandardScaler())
C:\Users\hamza\AppData\Roaming\Python\Python39\site-packages\sklearn\base.py:443: UserWarning: X has feature names, but PCA was fitted without feature names
# On calcule le ratio de variance expliquée par chaque composante
pca.test()
Explained variance ratio [1.08538412e-01 8.75017194e-02 7.77490398e-02 5.20043144e-02 4.00733200e-02 2.95774839e-02 2.27563870e-02 2.12445451e-02 1.67720930e-02 1.28623277e-02 1.06789297e-02 9.05847805e-03 8.66676528e-03 7.67481781e-03 6.86745141e-03 6.85449298e-03 6.44197479e-03 5.84037968e-03 5.66416106e-03 5.30865481e-03 5.21691504e-03 4.98386227e-03 4.76277866e-03 4.57168977e-03 4.39784843e-03 4.26868010e-03 4.15232873e-03 4.04682447e-03 3.98203841e-03 3.83584223e-03 3.80376761e-03 3.60168668e-03 3.57012951e-03 3.46886937e-03 3.40911434e-03 3.31127681e-03 3.27442498e-03 3.17644536e-03 3.06061271e-03 2.99180369e-03 2.95245638e-03 2.93375732e-03 2.84107407e-03 2.79646457e-03 2.76281072e-03 2.71423761e-03 2.70227372e-03 2.61082267e-03 2.60773103e-03 2.53723887e-03 2.52059475e-03 2.39691332e-03 2.36031925e-03 2.32596536e-03 2.29901441e-03 2.27932837e-03 2.23166289e-03 2.17076570e-03 2.15684880e-03 2.13659658e-03 2.08433339e-03 2.05693554e-03 2.04910478e-03 2.03844144e-03 1.98312220e-03 1.97550241e-03 1.93064068e-03 1.90672252e-03 1.89322367e-03 1.85977147e-03 1.82949012e-03 1.79899421e-03 1.77231345e-03 1.75485692e-03 1.74701750e-03 1.72553194e-03 1.71048904e-03 1.68049640e-03 1.66998622e-03 1.64013044e-03 1.61637819e-03 1.60695028e-03 1.59077454e-03 1.57447271e-03 1.56745150e-03 1.54581531e-03 1.52963081e-03 1.52206936e-03 1.51421426e-03 1.49224360e-03 1.48805635e-03 1.46401441e-03 1.45641009e-03 1.44136558e-03 1.42505179e-03 1.39713826e-03 1.38478518e-03 1.37418776e-03 1.36351329e-03 1.35755902e-03 1.34615174e-03 1.33239004e-03 1.32854833e-03 1.30464087e-03 1.29512138e-03 1.29327251e-03 1.28687849e-03 1.27102738e-03 1.26418622e-03 1.24309267e-03 1.23702064e-03 1.22681683e-03 1.22342536e-03 1.20581360e-03 1.20253070e-03 1.18293495e-03 1.18039906e-03 1.17687227e-03 1.16732748e-03 1.15373798e-03 1.15190545e-03 1.14145527e-03 1.12793813e-03 1.11928140e-03 1.11342209e-03 1.11021936e-03 1.10189687e-03 1.09586059e-03 1.08610346e-03 1.08009348e-03 1.07766126e-03 1.07268740e-03 1.05888735e-03 1.04730028e-03 1.04587151e-03 1.04120514e-03 1.03182802e-03 1.02517532e-03 1.01728726e-03 1.00877497e-03 1.00382173e-03 1.00014496e-03 9.88828664e-04 9.87237635e-04 9.78124736e-04 9.71353824e-04 9.62132617e-04 9.60749063e-04 9.59783707e-04 9.52198299e-04 9.47834425e-04 9.34766576e-04 9.33834202e-04 9.29865924e-04 9.26591152e-04 9.20454332e-04 9.16497912e-04 9.08709313e-04 9.01421063e-04 8.95278653e-04 8.92040990e-04 8.86094343e-04 8.83337254e-04 8.75452991e-04 8.71841706e-04 8.69535363e-04 8.66321510e-04 8.56451693e-04 8.52602769e-04 8.49519073e-04 8.46918249e-04 8.37903948e-04 8.33069907e-04 8.32527807e-04 8.29383026e-04 8.20444398e-04 8.19909476e-04 8.19132232e-04 8.16144784e-04 8.12259322e-04 8.10037912e-04 8.06862969e-04 7.98217357e-04 7.93432382e-04 7.89305474e-04 7.87414197e-04 7.82327525e-04 7.79790553e-04 7.74442195e-04 7.72359987e-04 7.69906868e-04 7.65730911e-04 7.62356415e-04 7.56808017e-04 7.52328330e-04 7.49170079e-04 7.44579765e-04 7.41975880e-04 7.39802036e-04 7.39114381e-04 7.34287661e-04 7.32667905e-04 7.27862553e-04 7.22768540e-04 7.19717281e-04 7.15842435e-04 7.12300608e-04 7.08658765e-04 7.07559352e-04 7.06115729e-04 7.02705251e-04 7.00671784e-04 6.95291332e-04 6.92082495e-04 6.89290391e-04 6.86254526e-04 6.83938918e-04 6.81579080e-04 6.78354429e-04 6.77242942e-04 6.75381844e-04 6.72649944e-04 6.68985967e-04 6.64689800e-04 6.61679414e-04 6.60051913e-04 6.59956206e-04 6.54075889e-04 6.52033345e-04 6.48412237e-04 6.45348291e-04 6.43197958e-04 6.41713452e-04 6.38921166e-04 6.38735921e-04 6.35395990e-04 6.33119810e-04 6.29507217e-04 6.27698268e-04 6.23915171e-04 6.21740579e-04 6.19806095e-04 6.14905529e-04 6.13484230e-04 6.11564977e-04 6.10780760e-04 6.06377160e-04 6.01888542e-04 5.99029645e-04 5.97996370e-04 5.95762364e-04 5.92884659e-04 5.89838621e-04 5.89391295e-04 5.88230472e-04 5.85614300e-04 5.83311141e-04 5.80744176e-04 5.78642625e-04 5.76622404e-04 5.74597854e-04 5.71406165e-04 5.70842210e-04 5.69610110e-04 5.68557581e-04 5.64633335e-04 5.62387465e-04 5.59953857e-04 5.58814337e-04 5.55609563e-04 5.53057299e-04 5.52356474e-04 5.50004124e-04 5.48365201e-04 5.46334190e-04 5.42565665e-04 5.41849896e-04 5.41480976e-04 5.40106979e-04 5.39608245e-04 5.37875884e-04 5.33207843e-04 5.33059901e-04 5.30659913e-04 5.28617518e-04 5.27024937e-04 5.25709435e-04 5.24581047e-04 5.23195177e-04 5.22246097e-04 5.21102172e-04 5.19424762e-04 5.16366020e-04 5.13326482e-04 5.11467700e-04 5.09698356e-04 5.07566687e-04 5.06484405e-04 5.05304402e-04 5.03154878e-04 5.02313837e-04 5.00249833e-04 4.98198670e-04 4.95425250e-04 4.93003545e-04 4.92262436e-04 4.89661264e-04 4.88377604e-04 4.87890166e-04 4.86380593e-04 4.85168206e-04 4.84086286e-04 4.81563476e-04 4.78691299e-04 4.78243465e-04 4.76416470e-04 4.74858776e-04 4.72213826e-04 4.70764526e-04 4.70043681e-04 4.68855357e-04 4.67206845e-04 4.66555539e-04 4.65534995e-04 4.63748259e-04 4.60674767e-04 4.59560080e-04 4.57908374e-04 4.56667930e-04 4.55028218e-04 4.53101267e-04 4.51882895e-04 4.51328670e-04 4.48939083e-04 4.48585526e-04 4.47509075e-04 4.45989177e-04 4.44261285e-04 4.42809477e-04 4.41595898e-04 4.40282739e-04 4.38410977e-04 4.37276374e-04 4.35960795e-04 4.35015153e-04 4.33914403e-04 4.32324069e-04 4.31322704e-04 4.29230401e-04 4.27900636e-04 4.26264858e-04 4.24422437e-04 4.23740020e-04 4.21928677e-04 4.21286810e-04 4.20187070e-04 4.17982373e-04 4.17607601e-04 4.15751944e-04 4.14243429e-04 4.13353549e-04 4.10020549e-04 4.08816643e-04 4.08530692e-04 4.08454274e-04 4.06876141e-04 4.06484317e-04 4.05539012e-04 4.04438245e-04 4.03328515e-04 4.01679990e-04 4.00813680e-04 3.99927899e-04 3.97774698e-04 3.97235626e-04 3.97170112e-04 3.94850709e-04 3.93710535e-04 3.91887685e-04 3.91631649e-04 3.90432604e-04 3.90257613e-04 3.88096339e-04 3.87678398e-04 3.86234893e-04 3.85283292e-04 3.84374845e-04 3.83734708e-04 3.83059652e-04 3.81216165e-04 3.80201598e-04 3.79229952e-04 3.76988258e-04 3.75616868e-04 3.75049160e-04 3.74304998e-04 3.73110382e-04 3.72546684e-04 3.71576552e-04 3.70168299e-04 3.69454051e-04 3.67940167e-04 3.66165546e-04 3.65889712e-04 3.64427699e-04 3.64105262e-04 3.63183336e-04 3.61499526e-04 3.60961849e-04 3.59879284e-04 3.58529791e-04 3.56451346e-04 3.55252771e-04 3.54603901e-04 3.52383217e-04 3.52082786e-04 3.51451962e-04 3.49830760e-04 3.49729537e-04 3.48679549e-04 3.47425931e-04 3.44822110e-04 3.44420353e-04 3.43943195e-04 3.42285380e-04 3.41116583e-04 3.40209148e-04 3.39627525e-04 3.38518402e-04 3.37530305e-04 3.36043359e-04 3.35485062e-04 3.35187908e-04 3.33238389e-04 3.32467151e-04 3.32039230e-04 3.31676653e-04 3.29295100e-04 3.29150831e-04 3.27281540e-04 3.26700079e-04 3.25726700e-04 3.24290882e-04 3.23800639e-04 3.22916060e-04 3.22629745e-04 3.21043290e-04 3.20175928e-04 3.19157385e-04 3.17948653e-04 3.16225607e-04 3.15547409e-04 3.14265198e-04 3.14122137e-04 3.13657472e-04 3.12351624e-04 3.10747627e-04 3.10123865e-04 3.08677006e-04 3.07975235e-04 3.07472094e-04 3.06931078e-04 3.05932720e-04 3.05304688e-04 3.04305836e-04 3.03843445e-04 3.02812067e-04 3.02428052e-04 3.01478972e-04 3.00586339e-04 2.99740286e-04 2.99303586e-04 2.98265703e-04 2.97304989e-04 2.96746494e-04 2.95678824e-04 2.95276323e-04 2.93741726e-04 2.92604153e-04 2.91382678e-04 2.90935373e-04 2.89651375e-04 2.88919851e-04 2.88513697e-04 2.87040332e-04 2.86256946e-04 2.85311457e-04 2.85168941e-04 2.83852834e-04 2.82883375e-04 2.82140748e-04 2.81574767e-04 2.80151423e-04 2.79262733e-04 2.77933372e-04 2.77322712e-04 2.76845678e-04 2.75698369e-04 2.74263970e-04 2.74105776e-04 2.73240972e-04 2.72844640e-04 2.72038797e-04 2.70557648e-04 2.69099409e-04 2.68009106e-04 2.67009727e-04 2.66726090e-04 2.65532469e-04 2.65368278e-04 2.64814877e-04 2.63740898e-04 2.62733422e-04 2.61641111e-04 2.60629320e-04 2.60125554e-04 2.59780727e-04 2.58829275e-04 2.58103684e-04 2.57525654e-04 2.56138455e-04 2.55577563e-04 2.55088545e-04 2.54105842e-04 2.53407368e-04 2.51925707e-04 2.51762015e-04 2.50707989e-04 2.50264838e-04 2.49356207e-04 2.48161160e-04 2.47745032e-04 2.45837862e-04 2.45175975e-04 2.44542396e-04 2.43773816e-04 2.42871919e-04 2.41708525e-04 2.40766194e-04 2.40164490e-04 2.39292651e-04 2.38435194e-04 2.36790606e-04 2.36390831e-04 2.35781841e-04 2.34445574e-04 2.33744781e-04 2.32959529e-04 2.32768118e-04 2.31760118e-04 2.30730397e-04 2.30212055e-04 2.29083865e-04 2.28333495e-04 2.27082423e-04 2.26564262e-04 2.25558413e-04 2.24768389e-04 2.22999329e-04 2.22797370e-04 2.21580713e-04 2.20711437e-04 2.20525422e-04 2.20057151e-04 2.19460794e-04 2.18636833e-04 2.17728134e-04 2.16661005e-04 2.15966926e-04 2.15325720e-04 2.13954648e-04 2.13146223e-04 2.12163313e-04 2.11581666e-04 2.10755095e-04 2.09425452e-04 2.09230657e-04 2.09095979e-04 2.08045770e-04 2.06498486e-04 2.06350943e-04 2.05387277e-04 2.04417072e-04 2.04066598e-04 2.03484049e-04 2.01880064e-04 2.01501083e-04 2.01100208e-04 2.00172001e-04 1.98756596e-04 1.98010514e-04 1.97186053e-04 1.95722523e-04 1.94999630e-04 1.93816675e-04 1.92826145e-04 1.91465025e-04 1.90491886e-04 1.89975585e-04 1.89538688e-04 1.86938285e-04 1.86279149e-04 1.85642566e-04 1.82814021e-04 1.82298124e-04 1.81602131e-04 1.81030458e-04 1.79432090e-04 1.78166104e-04 1.77678753e-04 1.77525050e-04 1.75756385e-04 1.74684582e-04 1.72357935e-04 1.71535697e-04 1.68975473e-04 1.68600030e-04 1.67678348e-04 1.67224116e-04 1.65291242e-04 1.64580570e-04 1.64519982e-04 1.62586101e-04 1.60915799e-04 1.60364089e-04 1.59334689e-04 1.58511103e-04 1.57367578e-04 1.54367808e-04 1.51533541e-04 1.51014694e-04 1.49588769e-04 1.47862913e-04 1.46061476e-04 1.44387028e-04 1.40230611e-04 1.38811834e-04 1.32379080e-04 1.29052042e-04 1.42178943e-33] Cumulative explained variance ratio [0.10853841 0.19604013 0.27378917 0.32579349 0.36586681 0.39544429 0.41820068 0.43944522 0.45621731 0.46907964 0.47975857 0.48881705 0.49748382 0.50515863 0.51202608 0.51888058 0.52532255 0.53116293 0.53682709 0.54213575 0.54735266 0.55233653 0.5570993 0.56167099 0.56606884 0.57033752 0.57448985 0.57853668 0.58251871 0.58635456 0.59015832 0.59376001 0.59733014 0.60079901 0.60420812 0.6075194 0.61079383 0.61397027 0.61703088 0.62002269 0.62297514 0.6259089 0.62874998 0.63154644 0.63430925 0.63702349 0.63972576 0.64233658 0.64494432 0.64748155 0.65000215 0.65239906 0.65475938 0.65708535 0.65938436 0.66166369 0.66389535 0.66606612 0.66822297 0.67035956 0.6724439 0.67450083 0.67654994 0.67858838 0.6805715 0.682547 0.68447764 0.68638437 0.68827759 0.69013736 0.69196685 0.69376585 0.69553816 0.69729302 0.69904003 0.70076557 0.70247606 0.70415655 0.70582654 0.70746667 0.70908305 0.71069 0.71228077 0.71385524 0.7154227 0.71696851 0.71849814 0.72002021 0.72153443 0.72302667 0.72451472 0.72597874 0.72743515 0.72887652 0.73030157 0.73169871 0.73308349 0.73445768 0.73582119 0.73717875 0.7385249 0.73985729 0.74118584 0.74249048 0.7437856 0.74507888 0.74636575 0.74763678 0.74890097 0.75014406 0.75138108 0.7526079 0.75383132 0.75503714 0.75623967 0.7574226 0.758603 0.75977987 0.7609472 0.76210094 0.76325284 0.7643943 0.76552224 0.76664152 0.76775494 0.76886516 0.76996706 0.77106292 0.77214902 0.77322911 0.77430678 0.77537946 0.77643835 0.77748565 0.77853152 0.77957273 0.78060456 0.78162973 0.78264702 0.78365579 0.78465962 0.78565976 0.78664859 0.78763583 0.78861395 0.78958531 0.79054744 0.79150819 0.79246797 0.79342017 0.794368 0.79530277 0.7962366 0.79716647 0.79809306 0.79901352 0.79993001 0.80083872 0.80174014 0.80263542 0.80352746 0.80441356 0.80529689 0.80617235 0.80704419 0.80791372 0.80878005 0.8096365 0.8104891 0.81133862 0.81218554 0.81302344 0.81385651 0.81468904 0.81551842 0.81633887 0.81715878 0.81797791 0.81879405 0.81960631 0.82041635 0.82122321 0.82202143 0.82281486 0.82360417 0.82439158 0.82517391 0.8259537 0.82672814 0.8275005 0.82827041 0.82903614 0.8297985 0.83055531 0.83130763 0.8320568 0.83280138 0.83354336 0.83428316 0.83502228 0.83575656 0.83648923 0.83721709 0.83793986 0.83865958 0.83937542 0.84008772 0.84079638 0.84150394 0.84221006 0.84291276 0.84361343 0.84430873 0.84500081 0.8456901 0.84637635 0.84706029 0.84774187 0.84842023 0.84909747 0.84977285 0.8504455 0.85111449 0.85177918 0.85244086 0.85310091 0.85376086 0.85441494 0.85506697 0.85571538 0.85636073 0.85700393 0.85764564 0.85828457 0.8589233 0.8595587 0.86019182 0.86082132 0.86144902 0.86207294 0.86269468 0.86331448 0.86392939 0.86454287 0.86515444 0.86576522 0.8663716 0.86697349 0.86757252 0.86817051 0.86876627 0.86935916 0.869949 0.87053839 0.87112662 0.87171223 0.87229554 0.87287629 0.87345493 0.87403155 0.87460615 0.87517756 0.8757484 0.87631801 0.87688657 0.8774512 0.87801359 0.87857354 0.87913236 0.87968797 0.88024102 0.88079338 0.88134338 0.88189175 0.88243808 0.88298065 0.8835225 0.88406398 0.88460409 0.8851437 0.88568157 0.88621478 0.88674784 0.8872785 0.88780712 0.88833414 0.88885985 0.88938443 0.88990763 0.89042987 0.89095098 0.8914704 0.89198677 0.89250009 0.89301156 0.89352126 0.89402883 0.89453531 0.89504061 0.89554377 0.89604608 0.89654633 0.89704453 0.89753996 0.89803296 0.89852522 0.89901488 0.89950326 0.89999115 0.90047753 0.9009627 0.90144679 0.90192835 0.90240704 0.90288529 0.9033617 0.90383656 0.90430877 0.90477954 0.90524958 0.90571844 0.90618564 0.9066522 0.90711774 0.90758148 0.90804216 0.90850172 0.90895963 0.90941629 0.90987132 0.91032442 0.91077631 0.91122764 0.91167657 0.91212516 0.91257267 0.91301866 0.91346292 0.91390573 0.91434733 0.91478761 0.91522602 0.9156633 0.91609926 0.91653427 0.91696819 0.91740051 0.91783183 0.91826106 0.91868896 0.91911523 0.91953965 0.91996339 0.92038532 0.92080661 0.92122679 0.92164478 0.92206238 0.92247814 0.92289238 0.92330573 0.92371575 0.92412457 0.9245331 0.92494155 0.92534843 0.92575491 0.92616045 0.92656489 0.92696822 0.9273699 0.92777071 0.92817064 0.92856842 0.92896565 0.92936282 0.92975767 0.93015138 0.93054327 0.9309349 0.93132534 0.93171559 0.93210369 0.93249137 0.9328776 0.93326289 0.93364726 0.934031 0.93441406 0.93479527 0.93517547 0.9355547 0.93593169 0.93630731 0.93668236 0.93705666 0.93742977 0.93780232 0.9381739 0.93854406 0.93891352 0.93928146 0.93964762 0.94001351 0.94037794 0.94074205 0.94110523 0.94146673 0.94182769 0.94218757 0.9425461 0.94290255 0.9432578 0.94361241 0.94396479 0.94431687 0.94466833 0.94501816 0.94536789 0.94571657 0.94606399 0.94640881 0.94675323 0.94709718 0.94743946 0.94778058 0.94812079 0.94846042 0.94879894 0.94913647 0.94947251 0.94980799 0.95014318 0.95047642 0.95080889 0.95114093 0.9514726 0.9518019 0.95213105 0.95245833 0.95278503 0.95311076 0.95343505 0.95375885 0.95408176 0.95440439 0.95472544 0.95504561 0.95536477 0.95568272 0.95599895 0.95631449 0.95662876 0.95694288 0.95725654 0.95756889 0.95787964 0.95818976 0.95849844 0.95880641 0.95911389 0.95942082 0.95972675 0.96003205 0.96033636 0.9606402 0.96094302 0.96124544 0.96154692 0.96184751 0.96214725 0.96244655 0.96274482 0.96304212 0.96333887 0.96363455 0.96392982 0.96422357 0.96451617 0.96480755 0.96509849 0.96538814 0.96567706 0.96596557 0.96625261 0.96653887 0.96682418 0.96710935 0.9673932 0.96767609 0.96795823 0.9682398 0.96851995 0.96879922 0.96907715 0.96935447 0.96963132 0.96990702 0.97018128 0.97045539 0.97072863 0.97100147 0.97127351 0.97154407 0.97181317 0.97208118 0.97234819 0.97261491 0.97288045 0.97314581 0.97341063 0.97367437 0.9739371 0.97419874 0.97445937 0.9747195 0.97497928 0.97523811 0.97549621 0.97575374 0.97600988 0.97626545 0.97652054 0.97677465 0.97702806 0.97727998 0.97753174 0.97778245 0.97803272 0.97828207 0.97853023 0.97877798 0.97902382 0.97926899 0.97951354 0.97975731 0.98000018 0.98024189 0.98048266 0.98072282 0.98096211 0.98120055 0.98143734 0.98167373 0.98190951 0.98214396 0.9823777 0.98261066 0.98284343 0.98307519 0.98330592 0.98353613 0.98376522 0.98399355 0.98422063 0.9844472 0.98467275 0.98489752 0.98512052 0.98534332 0.9855649 0.98578561 0.98600614 0.98622619 0.98644566 0.98666429 0.98688202 0.98709868 0.98731465 0.98752997 0.98774393 0.98795707 0.98816924 0.98838082 0.98859157 0.988801 0.98901023 0.98921933 0.98942737 0.98963387 0.98984022 0.99004561 0.99025003 0.99045409 0.99065758 0.99085946 0.99106096 0.99126206 0.99146223 0.99166099 0.991859 0.99205618 0.99225191 0.99244691 0.99264072 0.99283355 0.99302501 0.99321551 0.99340548 0.99359502 0.99378196 0.99396824 0.99415388 0.99433669 0.99451899 0.99470059 0.99488162 0.99506106 0.99523922 0.9954169 0.99559443 0.99577018 0.99594487 0.99611723 0.99628876 0.99645774 0.99662634 0.99679401 0.99696124 0.99712653 0.99729111 0.99745563 0.99761822 0.99777913 0.9979395 0.99809883 0.99825734 0.99841471 0.99856908 0.99872061 0.99887163 0.99902122 0.99916908 0.99931514 0.99945953 0.99959976 0.99973857 0.99987095 1. 1. ]
# On trace les pourcentages cumulés de variance expliquée en fonction des composantes
plt.plot(np.arange(1, 641), np.cumsum(pca.model.explained_variance_ratio_))
plt.xlabel('Composante')
plt.ylabel('Pourcentage de variance expliquée')
plt.show()
# Application de la réduction dimensionnelles aux données d'entrainement et données du test
model = PCA(n_components=3) # On se limites aux 3 premières composantes principales
pca = PrincipalComponentAnalysis(model, X_train, X_test, kernel=False)
pca.fit(scaler=StandardScaler())
x_train_pca = pca.x_train
x_test_pca = pca.x_test
C:\Users\hamza\AppData\Roaming\Python\Python39\site-packages\sklearn\base.py:443: UserWarning: X has feature names, but PCA was fitted without feature names
# Matrice de correlation pour s'assurer que les nouveaux axes sont décorrélés
plt.figure(figsize = (10,5))
sns.heatmap(
pd.DataFrame(
x_train_pca,
columns=['1ère comp. principale', '2ème comp. principale', '3ème comp. principale']
).corr(),annot = True , cmap = 'coolwarm'
)
plt.show()
# Distribution des 3 composantes principales de l'ensemble de données d'entrainement
pd.DataFrame(
x_train_pca,
columns=['1ère composante principale', '2ème composante principale', '3ème composante principale']
).hist(bins=50,figsize=(16,9))
plt.show()
# Distribution des 3 composantes principales de l'ensemble de données du test
pd.DataFrame(
x_test_pca,
columns=['1ère composante principale', '2ème composante principale', '3ème composante principale']
).hist(bins=50,figsize=(16,9))
plt.show()
# Initialisation du modèle
model = KernelPCA(n_components=3, kernel='poly') # On se limites aux 3 premières composantes principales
pca_kernel = PrincipalComponentAnalysis(model, X_train, X_test, kernel=True)
# Entrainement du modèle
pca_kernel.fit(scaler=StandardScaler())
C:\Users\hamza\AppData\Roaming\Python\Python39\site-packages\sklearn\base.py:443: UserWarning: X has feature names, but KernelPCA was fitted without feature names
# Application de la réduction dimensionnelles aux données d'entrainement et données du test
x_train_pca_kernel = pca_kernel.x_train
x_test_pca_kernel = pca_kernel.x_test
# Distribution des 3 composantes principales de l'ensemble de données d'entrainement
pd.DataFrame(
x_train_pca_kernel,
columns=['1ère composante principale', '2ème composante principale', '3ème composante principale']
).hist(bins=50,figsize=(16,9))
plt.show()
# Distribution des 3 composantes principales de l'ensemble de données du test
pd.DataFrame(
x_test_pca_kernel,
columns=['1ère composante principale', '2ème composante principale', '3ème composante principale']
).hist(bins=50,figsize=(16,9))
plt.show()
# Valeurs de l'hyperparamètre max_depth à inclure dans la validation croisée
rf_parameters = {'rf__max_depth': np.arange(1, 101, 5)}
# Initialisation du modèle
rf_model = RandomForestModel(
X_train,
X_test,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
rf_parameters
)
# Entrainement du modèle
init_time = time.time()
rf_model.train(scaler=StandardScaler(), cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 287.6037857532501
# On teste le modèle sur les données d'entrainement et les données du tes
rf_model.test()
Returned hyperparameter: {'rf__max_depth': 11}
Best accuracy in train is: 0.9968774223743292
Classification accuracy on test is: 1.0
plt.plot(np.arange(1, 101, 5), rf_model.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre max_depth')
plt.ylabel('Précision')
plt.show()
rf_model_pca = RandomForestModel(
x_train_pca,
x_test_pca,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
rf_parameters
)
init_time = time.time()
rf_model_pca.train(scaler=StandardScaler(), cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 20.64007019996643
rf_model_pca.test()
Returned hyperparameter: {'rf__max_depth': 71}
Best accuracy in train is: 0.9062202916355871
Classification accuracy on test is: 0.35403726708074534
plt.plot(np.arange(1, 101, 5), rf_model_pca.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre max_depth')
plt.ylabel('Précision')
plt.show()
rf_model_pca_kernel = RandomForestModel(
x_train_pca_kernel,
x_test_pca_kernel,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
rf_parameters
)
init_time = time.time()
rf_model_pca_kernel.train(scaler=StandardScaler(), cv=5)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 31.86460280418396
rf_model_pca_kernel.test()
Returned hyperparameter: {'rf__max_depth': 71}
Best accuracy in train is: 0.928125
Classification accuracy on test is: 0.6335403726708074
plt.plot(np.arange(1, 101, 5), rf_model_pca_kernel.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre max_depth')
plt.ylabel('Précision')
plt.show()
svm_parameters = {'svc__C': np.logspace(-2, 2, 5, base=2)}
svm_model = SVMModel(
X_train,
X_test,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
svm_parameters
)
init_time = time.time()
svm_model.train(StandardScaler(), max_iter=3000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 154.05242919921875
svm_model.test()
Returned hyperparameter: {'svc__C': 0.25}
Best accuracy in train is: 0.998435054773083
Classification accuracy on test is: 1.0
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), svm_model.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
svm_model_pca = SVMModel(
x_train_pca,
x_test_pca,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
svm_parameters
)
init_time = time.time()
svm_model_pca.train(StandardScaler(), max_iter=3000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 0.7185711860656738
svm_model_pca.test()
Returned hyperparameter: {'svc__C': 4.0}
Best accuracy in train is: 0.8422184195515774
Classification accuracy on test is: 0.4782608695652174
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), svm_model_pca.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
svm_model_pca_kernel = SVMModel(
x_train_pca_kernel,
x_test_pca_kernel,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
svm_parameters
)
init_time = time.time()
svm_model_pca_kernel.train(StandardScaler(), max_iter=5000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 1.3066341876983643
svm_model_pca_kernel.test()
Returned hyperparameter: {'svc__C': 2.0}
Best accuracy in train is: 0.8093618826144824
Classification accuracy on test is: 0.4161490683229814
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), svm_model_pca_kernel.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
lr_parameters = {'logreg__C': np.logspace(-2, 2, 5, base=2)}
lr_model = LogisticRegressionModel(
X_train,
X_test,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
lr_parameters
)
init_time = time.time()
lr_model.train(StandardScaler(), max_iter=3000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 114.40310335159302
lr_model.test()
Returned hyperparameter: {'logreg__C': 0.25}
Best accuracy in train is: 0.9968701095461658
Classification accuracy on test is: 1.0
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), lr_model.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
lr_model_pca = LogisticRegressionModel(
x_train_pca,
x_test_pca,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
lr_parameters
)
init_time = time.time()
lr_model_pca.train(StandardScaler(), max_iter=3000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 0.5583958625793457
lr_model_pca.test()
Returned hyperparameter: {'logreg__C': 1.0}
Best accuracy in train is: 0.9046699720649963
Classification accuracy on test is: 0.4409937888198758
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), lr_model_pca.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
lr_model_pca_kernel = LogisticRegressionModel(
x_train_pca_kernel,
x_test_pca_kernel,
np.array(np.transpose(y_train)).reshape(-1),
np.array(np.transpose(y_test)).reshape(-1),
lr_parameters
)
init_time = time.time()
lr_model_pca_kernel.train(StandardScaler(), max_iter=3000, cv=3)
training_time = time.time() - init_time
print('Training time: ', training_time)
Training time: 0.6514723300933838
lr_model_pca_kernel.test()
Returned hyperparameter: {'logreg__C': 4.0}
Best accuracy in train is: 0.9218624310765945
Classification accuracy on test is: 0.35403726708074534
# On analyse la performance du modèle en fonction des valeurs considérées de l'hyperparamètre C
plt.plot(np.logspace(-2, 2, 5, base=2), lr_model_pca_kernel.clf.cv_results_['mean_test_score'])
plt.xlabel('Paramètre C')
plt.ylabel('Précision')
plt.show()
# Création du data loader
train_tensor = TensorDataset(
torch.tensor(X_train.values.astype(np.float32)),
torch.tensor(y_train.values.astype(np.float32))
)
trainloader = DataLoader(
train_tensor,
batch_size=32,
shuffle=True
)
test_tensor = TensorDataset(
torch.tensor(X_test.values.astype(np.float32)),
torch.tensor(y_test.values.astype(np.float32))
)
testloader = DataLoader(
test_tensor,
batch_size=32,
shuffle=False
)
# Définition du modèle et des différents paramètres
neural_network = NeuralNetwork(
input_size=20531,
hidden_sizes=[1024, 256],
output_size=5
)
learning_rate = 0.001
num_epochs = 5
optimizer = optim.SGD(neural_network.parameters(), lr=learning_rate, momentum=0.9)
# On entraine le modèle
neural_network.train_model(trainloader, optimizer, num_epochs)
Training Epoch [1/5]: 100%|████████████████████| 20/20 [00:04<00:00, 4.52it/s] Training Epoch [2/5]: 100%|████████████████████| 20/20 [00:03<00:00, 6.52it/s] Training Epoch [3/5]: 100%|████████████████████| 20/20 [00:03<00:00, 6.52it/s] Training Epoch [4/5]: 100%|████████████████████| 20/20 [00:03<00:00, 6.47it/s] Training Epoch [5/5]: 100%|████████████████████| 20/20 [00:03<00:00, 5.90it/s]
# On teste le modèle
neural_network.test_model(testloader)
Test in progress: 100%|█| 6/6 [00:00<00:00, 35.50it/s, test_acc=1, test_loss=0]
(0.0, 1.0)